import requests
from bs4 import BeautifulSoup
import pandas as pd

df=pd.read_csv('../../file/第三分类标题.csv', encoding='utf_8_sig')
data=[]

def get(url):
    list=[]
    res = requests.get(url)
    soup = BeautifulSoup(res.text, 'lxml')
    book_list=soup.select('#book_list > a')
    for book in book_list:
        href=book.get('href')
        cover=book.find('img',class_="").get('src')
        title=book.find('div',class_="title").text
        author=book.find('div',class_="author").text
        des=book.find('div',class_="des").text
        list.append({
            'href':href,
            'cover':cover,
            'title':title,
            'author':author,
            'des':des
        })
    return list

for row in df.iterrows():
    first_category=row[1]['first_category']
    second_category=row[1]['second_category']
    third_category=row[1]['third_category']
    url=f"https:{row[1]['url']}"
    res=get(url)
    for item in res:
        data.append({
            'first_category':first_category,
            'second_category':second_category,
            'third_category':third_category,
            'title': item['title'],
            'author': item['author'],
            'cover': item['cover'],
            'href': item['href'],
            'des': item['des']
        })

data=pd.DataFrame(data)
data.to_csv('图书信息.csv',encoding='utf_8_sig',index=False)
print('爬取完成')